#!/bin/bash

model_path="/data/nfs-ten1/nfs/meichaoyang001/model/Qwen1.5-14B-Chat_240216"


export WANDB_API_KEY=9afc62359e50f5d0b24fee88ce7ce8d162e998ed
export WANDB_DISABLED=true


#eval "$(conda shell.bash hook)"
#conda activate /data/nfs-ten1/nfs/meichaoyang001/envs/llama_240508_cuda12_2


VLLM_WORKER_MULTIPROC_METHOD=spawn llamafactory-cli api \
    --model_name_or_path $model_path \
    --template qwen \
    --infer_backend vllm \
    --vllm_maxlen 30210 \
    --vllm_gpu_util 0.95 \
    --vllm_enforce_eager false